/* 
   This program imports the CART imputation .csv files for the FHS concrete sample (for years 1977-92) into SAS,
   merges the PPN and id (firm id), and saves the merged files.

*/

%include "ASMimplibs.sas";



%MACRO import_imputes(ind,yr);

            data WORK.&ind._imputes&yr                           ;
            %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
            infile "&ind.&yr._imputes.csv" delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
               informat number $5. ;
               informat TVS best32. ;
               informat TE best32.;
               informat PH best32. ;
               informat SW best32. ;
               informat PQS best32. ;
               informat kseq best32. ;
               informat ksst best32. ;
               informat fdeath $1. ;
               informat plant_cm best32.;
               informat energycmratio best32.;
               informat cpcmratio best32. ;
               informat wwswratio best32. ;
               informat pvtvsratio best32.;
               informat impsetnum best32. ;

               format number $5. ;
               format TVS best12. ;
               format TE best12. ;
               format PH best12. ;
               format SW best12. ;
               format PQS best12. ;
               format kseq best12. ;
               format ksst best12. ;
               format fdeath $1.;
               format plant_cm best12. ;
               format energycmratio best12. ;
               format cpcmratio best12. ;
               format wwswratio best12. ;
               format pvtvsratio best12. ;
               format impsetnum best12. ;
            input
                        number $
                        TVS
                        TE
                        PH
                        SW
                        PQS
                        kseq
                        ksst
                        fdeath $
                        plant_cm
                        energycmratio
                        cpcmratio
                        wwswratio
                        pvtvsratio
                        impsetnum
            ;
            if _ERROR_ then call symputx('_EFIERR_',1);  /* set ERROR detection macro variable */
            run;
%MEND ;

%import_imputes(concrete,77);
%import_imputes(concrete,82);
%import_imputes(concrete,87);
%import_imputes(concrete,92);


%MACRO merge_ids(industry,ind,yr);

  data &industry._imputes&yr (keep = NUMBER_NUM year_num TVS TE PH SW PQS kseq ksst fdeath plant_cm energycmratio cpcmratio wwswratio pvtvsratio impsetnum); 
   set &industry._imputes&yr; 
   NUMBER_NUM = input(NUMBER,5.);
   year = "19&yr";
   year_num = input(year,4.); 
  run;

  PROC DATASETS  LIBRARY=WORK;
   MODIFY &industry._imputes&yr;
   rename NUMBER_NUM = number;
   rename year_NUM = year;
  RUN;

  PROC SORT DATA=&industry._imputes&yr;
   by NUMBER;
  RUN;


  /** Merge ids **/

 data &ind.&yr._ids (keep = number year ppn id pqs_imp_mean ); 
  set fhs7797.&ind.&yr._gooddata_ids ;
 run;
 
 proc sort data=&ind.&yr._ids; by number; run;

  data &ind.&yr._imputes_after_merge;
   merge &ind.&yr._ids (in=inids) &industry._imputes&yr (in=inimp); 
  by NUMBER;
  if inids and inimp;
  run;

  proc datasets library=work;
   modify &ind.&yr._imputes_after_merge ;
    rename impsetnum=_IMPUTE_;
  run;

  /* Check to make sure we got all the PPN's matched up correctly: */
  /* Note: rejCB&ind should be empty. */

  data CB&ind&yr (keep = ppn ) ; 
   set db50im.all&ind;
   if year= 19&yr;
  run;

  proc sort data=CB&ind&yr; by ppn; run;
  proc sort data=&ind.&yr._imputes_after_merge; by ppn; run;

  data test&ind.&yr rejCB&ind&yr;
    merge &ind.&yr._imputes_after_merge (in=inCART) CB&ind&yr (in=inCB);
    by ppn;
    if inCART and inCB then output test&ind.&yr;
    else if inCB then output rejCB&ind&yr;
  run;

  proc freq data=rejCB&ind&yr;
    tables ppn;
  title1 "PPNs in original gooddata for 19&yr &industry";
  title2 "that did not match after merging PPNs to CART imputes";
  run;

%MEND;


%merge_ids(concrete,concf,77);
%merge_ids(concrete,concf,82);
%merge_ids(concrete,concf,87);
%merge_ids(concrete,concf,92);


data fhs7797.conc_imputes;
 set concf77_imputes_after_merge
     concf82_imputes_after_merge
     concf87_imputes_after_merge
     concf92_imputes_after_merge;
 run;

 
